Mouse hypothalamic (MERFISH)¶

importing¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import scipy.io
import pickle

import warnings
warnings.filterwarnings('ignore')
import scipy.sparse as sp
import enclus

sc.set_figure_params(dpi=300,fontsize=10,dpi_save=300,facecolor='white',format='pdf')

Data Integration¶

In [2]:
st_data = sc.read('./datasets/ProcessedMERFISH_hypothalamic_st_data.h5ad')
sc_data = sc.read('./datasets/ProcessedMERFISH_hypothalamic_sc_data.h5ad')
if isinstance(sc_data.X, sp.csr_matrix):
    sc_data.X = sc_data.X.toarray()
print(sc_data,st_data)
AnnData object with n_obs × n_vars = 30370 × 27998
    obs: 'cell_type', 'domain_id', 'source'
    var: 'Gene'
    uns: 'log1p' AnnData object with n_obs × n_vars = 64373 × 155
    obs: 'cell_type', 'domain_id', 'source'
    uns: 'log1p'
    obsm: 'spatial'
In [ ]:
enclus_model = enclus.ENCLUS(spatial_data = st_data, sc_data = sc_data,
                    num_layers=3,
                    num_neurons=1024,
                    latent_dim=512,
                    k_nearest=8,
                    num_cov_genes=64,
                    cov_genes=[],
                    num_HVG=2048,
                    spatial_dist="pois",
                    sc_dist="nb",
                    spatial_coeff=1,  
                    sc_coeff=1,   
                    kl_coeff=0.05, 
                    n_clusters=6,
                    tau=0.1, 
                    gamma=0.1,
                    adaptive_weights=True,
                    early_stopping=True,
                    patience=20,
                    num_heads=10,
                    head_dim=128
                    )

enclus_model.train(training_steps=6000,
    batch_size=4096,
    verbose=16,
    init_lr=0.0005,
    decay_steps=4000)

enclus_model.impute_genes()


st_data.obsm['enclus_latent'] = enclus_model.spatial_data.obsm['enclus_latent']

st_data.obsm['imputation'] = enclus_model.spatial_data.obsm['imputation']

sc_data.obsm['enclus_latent'] = enclus_model.sc_data.obsm['enclus_latent']
sc_data shape and st_data shape: (30370, 2189) (64373, 154)
Initializing CVAE
Finished Initializing ENCLUS
Initializing cluster centers...
 | spatial_w: 3.62 sc_w: 8.65 cov_w: 8.41 kl_w: 0.60 cluster_w: 1.42:   6%|▌         | 273/4628 [59:03<15:42:10, 12.98s/it]
Early stopping triggered

Finished imputing missing gene for spatial data! See 'imputation' in obsm of ENCLUS.spatial_data
In [4]:
# st_data.obsm['imputation']
In [35]:
# sc_data.write('./datasets/ENCLUS_hypo_sc.h5ad')
# st_data.write('./datasets/ENCLUS_hypo_st.h5ad')

Evaluate the results with various scores¶

After integration,Visualize the data using UMAP according to their cell types and sources.

In [6]:
# sc_data = sc.read('./datasets/ENCLUS_hypo_sc.h5ad')
# st_data = sc.read('./datasets/ENCLUS_hypo_st.h5ad')
# sc_data,st_data
In [28]:
import anndata as ad
sc.settings.figdir = './Result/hypothalamic/'
use_rep = 'enclus_latent'
adata = ad.concat([st_data,sc_data], join='inner', merge='same')
sc.tl.pca(adata)
sc.pp.neighbors(adata, use_rep=use_rep, n_neighbors=22)
sc.tl.umap(adata,min_dist=0.1, n_components=2)
sc.pl.umap(adata, color=['source','cell_type'],save='enclus-integrated.pdf',wspace=0.3,legend_fontsize=10)
WARNING: saving figure to file Result\hypothalamic\umapenclus-integrated.pdf
No description has been provided for this image

We evaluated the results by F1, ARI, NMI, Batch Entropy and Silhouette scores.

In [32]:
import enclus.metrics
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, f1_score
adata1 = adata[adata.obs['domain_id']==0]
adata2 = adata[adata.obs['domain_id']==1]
y_test = enclus.metrics.label_transfer(adata2, adata1, label='cell_type', rep='X_umap')

f1 = f1_score(adata1.obs['cell_type'], y_test, average='weighted')
ARI = adjusted_rand_score(adata1.obs['cell_type'], y_test)
NMI = normalized_mutual_info_score(adata1.obs['cell_type'], y_test)
print('F1:', f1)
print('ARI:', ARI)
print('NMI:', NMI)
print('total score:',f1+ARI+NMI)
print('Batch Entropy:', enclus.metrics.batch_entropy_mixing_score(adata.obsm['X_umap'], adata.obs['domain_id']))
print('Silhouette:', enclus.metrics.silhouette(adata.obsm['X_umap'], adata.obs['cell_type']))
F1: 0.8644674623565449
ARI: 0.7796801267520229
NMI: 0.757568693254186
total score: 2.401716282362754
Batch Entropy: 0.19221229685557456
Silhouette: 0.6560687

Methods comparison¶

In [2]:
data = {
    "Harmony": [0.800, 0.603, 0.670],
    "Seurat": [0.809, 0.651, 0.659],
    "SCALEX": [0.751, 0.571, 0.573],
    "scVI": [0.817, 0.649, 0.664],
    "uniPort": [0.871, 0.743, 0.732],
    "gimVI": [0.646, 0.626, 0.607],
    "MultiMAP": [0.881, 0.763, 0.768],
    "SpateCV": [0.864, 0.780, 0.758],
}
metrics = [ "F1","ARI", "NMI"]
df = pd.DataFrame(data, index=metrics)

# Calculate the total score for each method and sort them by the total score
df.loc["Total Score"] = df.sum(axis=0)
df_sorted = df.T.sort_values(by="Total Score", ascending=False).drop(columns=["Total Score"])

fig, ax = plt.subplots(figsize=(8, 5), dpi=300)

bottom = np.zeros(len(df_sorted))
colors = ["#CDC1FF", "#FDB7EA", "#86A788"] 

for idx, metric in enumerate(metrics):
    ax.bar(df_sorted.index, df_sorted[metric], bottom=bottom, label=metric, color=colors[idx])
    bottom += df_sorted[metric]
    
for i, (method, row) in enumerate(df_sorted.iterrows()):
    total_score = row.sum()
    ax.text(i, total_score + 0.05, f'{total_score:.3f}', ha='center', fontsize=8, fontweight='bold')

ax.set_ylabel("Total Score", fontsize=12, fontweight='bold')
ax.set_xticklabels(df_sorted.index, rotation=45, ha="right", fontsize=10, fontweight='bold')
ax.set_yticks(np.arange(0, max(bottom) + 0.2, 0.5))
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_linewidth(1)
ax.spines["bottom"].set_linewidth(1)
ax.legend(title="Metrics", fontsize=10, title_fontsize=10, frameon=False, loc='upper right')
# ax.grid(axis='y', linestyle='--', alpha=0.6)
ax.grid(None)
plt.tight_layout()
plt.savefig('./Result/hypothalamic/metrics.pdf')
plt.show()
No description has been provided for this image
In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

methods = ["Harmony", "Seurat", "SCALEX", "scVI", "uniPort", "gimVI", "MultiMAP", "SpateCV"]
metrics = ["Batch Entropy", "Silhouette"]
values = np.array([
    [0.153, 0.294, 0.515, 0.437, 0.480, 0.594, 0.514, 0.192],
    [0.492, 0.514, 0.628, 0.623, 0.693, 0.586, 0.670, 0.656]
])

df = pd.DataFrame(values, index=metrics, columns=methods)

fig, ax = plt.subplots(figsize=(5, 3), dpi=300)
cmap = plt.get_cmap("tab20")  
colors = [cmap(i) for i in range(len(methods))]

for i, method in enumerate(methods):
    ax.scatter(df.loc["Batch Entropy", method], df.loc["Silhouette", method], 
               label=method, color=colors[i], s=100)

ax.set_xlabel("Batch Entropy", fontsize=8, fontweight='bold')
ax.set_ylabel("Silhouette", fontsize=8, fontweight='bold')

ax.legend(loc="upper left", fontsize=8, frameon=False, handletextpad=1, bbox_to_anchor=(1.05, 1))
ax.grid(None)

plt.tight_layout()
plt.savefig('./Result/hypothalamic/batch_silh.pdf')
plt.show()
No description has been provided for this image

Visualize sc and st latent¶

In [41]:
import umap.umap_ as umap
fit = umap.UMAP(
    n_neighbors = 15,
    min_dist = 0.1,
    n_components = 2,
)

latent_umap = fit.fit_transform(np.concatenate([st_data.obsm['enclus_latent'], sc_data.obsm['enclus_latent']], axis = 0))

st_data.obsm['latent_umap'] = latent_umap[:st_data.shape[0]]
sc_data.obsm['latent_umap'] = latent_umap[st_data.shape[0]:]

lim_arr = np.concatenate([st_data.obsm['latent_umap'], sc_data.obsm['latent_umap']], axis = 0)


delta = 1
pre = 0.1
xmin = np.percentile(lim_arr[:, 0], pre) - delta
xmax = np.percentile(lim_arr[:, 0], 100 - pre) + delta
ymin = np.percentile(lim_arr[:, 1], pre) - delta
ymax = np.percentile(lim_arr[:, 1], 100 - pre) + delta

color_dict = {'Astrocytes': '#1f77b4',
 'Inhibitory': '#aec7e8',
 'OD Mature': '#ff7f0e',
 'Endothelial': '#ffbb78',
 'Pericytes': '#2ca02c',
 'OD Immature': '#98df8a',
 'Excitatory': '#d62728',
 'Microglia': '#ff9896',
 'Ependymal': '#9467bd',
 'Fibroblast': '#c5b0d5',
 'Mural': '#8c564b',
 'Newly formed': '#c49c94',
 'Macrophage': '#e377c2',
 }
#13
labelnames = ['Astrocytes', 'Inhibitory', 'OD Mature', 'Endothelial', 'Pericytes', 'OD Immature', 'Excitatory', 'Microglia', 'Ependymal',
            'Fibroblast','Mural','Newly formed','Macrophage']

fig = plt.figure(figsize = (13,5),dpi=300)
plt.subplot(121)
sns.scatterplot(x = sc_data.obsm['latent_umap'][:, 0],
                y = sc_data.obsm['latent_umap'][:, 1], hue = sc_data.obs['cell_type'], s = 8, palette = color_dict,
                legend = False)
plt.title("scRNA-seq Latent")
plt.xlim([xmin, xmax])
plt.ylim([ymin, ymax])
plt.axis('off')

plt.subplot(122)
sns.scatterplot(x = st_data.obsm['latent_umap'][:, 0],
                y = st_data.obsm['latent_umap'][:, 1],  hue = st_data.obs['cell_type'], s = 8, palette = color_dict, legend = True)


legend = plt.legend(title = 'Cell Type', prop={'size': 12}, fontsize = '12',  markerscale = 3, ncol = 2, bbox_to_anchor = (1, 1))#, loc = 'lower left')
plt.setp(legend.get_title(),fontsize='12')
plt.title("st_data Latent")
plt.axis('off')
plt.tight_layout()
plt.xlim([xmin, xmax])
plt.ylim([ymin, ymax])
plt.savefig('./Result/hypothalamic/latent.pdf')
plt.show()
No description has been provided for this image
In [ ]:
color_dict = {'Astrocytes': '#1f77b4',
 'Inhibitory': '#aec7e8',
 'OD Mature': '#ff7f0e',
 'Endothelial': '#ffbb78',
 'Pericytes': '#2ca02c',
 'OD Immature': '#98df8a',
 'Excitatory': '#d62728',
 'Microglia': '#ff9896',
 'Ependymal': '#9467bd',
 'Fibroblast': '#c5b0d5',
 'Mural': '#8c564b',
 'Newly formed': '#c49c94',
 'Macrophage': '#e377c2',
 }
st_data.obs['x'] = st_data.obsm['spatial'][:,0]
st_data.obs['y'] = st_data.obsm['spatial'][:,1]
sns.set_context('paper',font_scale=2) 
plt.subplots(figsize=(5,5),dpi=150)
sns.scatterplot(data=st_data.obs, x="x", y="y", hue="cell_type",hue_order=labelnames,s=10,palette=color_dict)
plt.legend(bbox_to_anchor=(1.0,0.98), loc="upper left",framealpha=0,markerscale=1.5)
plt.gca().invert_yaxis()
plt.axis('off')
plt.show()
No description has been provided for this image

Data preprocessing¶

In [8]:
df = pd.read_csv('./datasets/hypothalamic/Moffitt_and_Bambah-Mukku_et_al_merfish_all_cells.csv')

# Extract the rows where 'Animal_ID' is equal to 1
filtered_df = df[df['Animal_ID'] == 1]
filtered_df = filtered_df[filtered_df['Cell_class'] != 'Ambiguous']
# Merge the rows 'OD Mature 1' and 'OD Mature 2' and name it 'OD Mature'
filtered_df.loc[filtered_df['Cell_class'].isin(['OD Mature 1', 'OD Mature 2','OD Mature 3','OD Mature 4']), 'Cell_class'] = 'OD Mature'
filtered_df.loc[filtered_df['Cell_class'].isin(['OD Immature 1', 'OD Immature 2']), 'Cell_class'] = 'OD Immature'
filtered_df.loc[filtered_df['Cell_class'].isin(['Endothelial 1', 'Endothelial 2','Endothelial 3']), 'Cell_class'] = 'Endothelial'
filtered_df.loc[filtered_df['Cell_class'].isin(['Astrocyte']), 'Cell_class'] = 'Astrocytes'

print(filtered_df['Cell_class'].unique())

expression_columns = df.columns[filtered_df.columns.get_loc('Ace2'):]
# Check which genes (columns) contain NaN
nan_genes = filtered_df[expression_columns].isna().any()
nan_gene = nan_genes[nan_genes].index[0]

# Columns to be deleted, including genes containing NaN and the 'Blank' column
columns_to_drop = [nan_gene] + ['Blank_1', 'Blank_2', 'Blank_3', 'Blank_4', 'Blank_5']


filtered_df = filtered_df.drop(columns=columns_to_drop)

# Select the required columns
# 1. Cell_ID and Cell_class will be used for obs
# 2. Centroid_X and Centroid_Y will be used for spatial
# 3. From Ace2 to the last column are gene expression data
expression_columns = filtered_df.columns[filtered_df.columns.get_loc('Ace2'):]

gene_names = expression_columns.tolist()

# Create AnnData object
# st_data.obs contains Cell_ID and Cell_class
# st_data.spatial contains Centroid_X and Centroid_Y
# st_data.X contains gene expression data from Ace2 to the last row
st_data = sc.AnnData(
    X=filtered_df[expression_columns].values,  
    obs=filtered_df[['Cell_ID', 'Cell_class']].set_index('Cell_ID'),  # Cell_ID 和 Cell_class
    obsm={'spatial': filtered_df[['Centroid_X', 'Centroid_Y']].values}  
)

st_data.var_names = gene_names

st_data.obs.rename(columns={'Cell_class': 'cell_type'}, inplace=True)


print(st_data)

# st_data.write('./datasets/hypothalamic/MERFISH_hypothalamic_st_data.h5ad')
['Astrocytes' 'Inhibitory' 'OD Mature' 'Endothelial' 'Pericytes'
 'OD Immature' 'Excitatory' 'Microglia' 'Ependymal']
AnnData object with n_obs × n_vars = 64373 × 155
    obs: 'cell_type'
    obsm: 'spatial'
In [3]:
import pandas as pd
import scipy.io
import scanpy as sc

# Read the gene expression matrix (.mtx file)
matrix = scipy.io.mmread('./datasets/hypothalamic/GSE113576_matrix.mtx').tocsc().T

# Read the barcodes and genes files
barcodes = pd.read_csv('./datasets/hypothalamic/GSE113576_barcodes.tsv', header=None, sep='\t')
genes = pd.read_csv('./datasets/hypothalamic/GSE113576_genes.tsv', header=None, sep='\t')

sc_data = sc.AnnData(X=matrix)

# Set the index of obs to barcodes
sc_data.obs = pd.DataFrame(index=barcodes[0].values)

sc_data.var['Gene'] = genes[1].values
sc_data.var_names = genes[1].values

# Read the cell type annotation file (Excel file)
cell_type_df = pd.read_excel('./datasets/hypothalamic/NIHMS1024025-supplement-Table_S1.xlsx')
cell_type_dict = cell_type_df[['Cell_ID', 'Cell_class']]

# Merge cell type information into the obs of the AnnData object
sc_data.obs = sc_data.obs.merge(cell_type_dict, left_index=True, right_on='Cell_ID', how='left')

sc_data.obs['Cell_class'] = sc_data.obs['Cell_class'].replace({
    'Immature oligodendrocyte': 'OD Immature',
    'Mature oligodendrocyte': 'OD Mature',
    'Newly formed oligodendrocyte':'Newly formed'
})

# Delete rows with values 'Ambiguous' and 'Unstable' in Cell_class
sc_data = sc_data[~sc_data.obs['Cell_class'].isin(['Ambiguous', 'Unstable'])]
sc_data.obs.set_index('Cell_ID', inplace=True)
sc_data.obs.rename(columns={'Cell_class': 'cell_type'}, inplace=True)
print(sc_data.obs['cell_type'].unique())

print(sc_data)
# sc_data.write('./datasets/hypothalamic/MERFISH_hypothalamic_sc_data.h5ad')
['Fibroblast' 'Excitatory' 'Inhibitory' 'OD Mature' 'Endothelial'
 'OD Immature' 'Microglia' 'Mural' 'Newly formed' 'Astrocytes'
 'Macrophage' 'Ependymal']
AnnData object with n_obs × n_vars = 30370 × 27998
    obs: 'cell_type'
    var: 'Gene'
In [2]:
adata_merfish = sc.read('./datasets/hypothalamic/MERFISH_hypothalamic_st_data.h5ad')
adata_rna = sc.read('./datasets/hypothalamic/MERFISH_hypothalamic_sc_data.h5ad')
adata_merfish,adata_rna
Out[2]:
(AnnData object with n_obs × n_vars = 64373 × 155
     obs: 'cell_type'
     obsm: 'spatial',
 AnnData object with n_obs × n_vars = 30370 × 27998
     obs: 'cell_type'
     var: 'Gene')
In [3]:
adata_merfish.obs['domain_id'] = 0
adata_merfish.obs['domain_id'] = adata_merfish.obs['domain_id'].astype('category')
adata_merfish.obs['source'] = 'MERFISH'

adata_rna.obs['domain_id'] = 1
adata_rna.obs['domain_id'] = adata_rna.obs['domain_id'].astype('category')
adata_rna.obs['source'] = 'RNA'

print(adata_rna.obs)
print(adata_merfish.obs)
                     cell_type domain_id source
Cell_ID                                        
AAACCTGAGATGTGGC-1  Fibroblast         1    RNA
AAACCTGCACACAGAG-1  Excitatory         1    RNA
AAACCTGCACTACAGT-1  Inhibitory         1    RNA
AAACCTGTCAGGATCT-1  Excitatory         1    RNA
AAACCTGTCGCACTCT-1   OD Mature         1    RNA
...                        ...       ...    ...
TTTGGTTGTTATCACG-6  Inhibitory         1    RNA
TTTGGTTGTTATTCTC-6  Inhibitory         1    RNA
TTTGTCAGTTCCGTCT-6  Inhibitory         1    RNA
TTTGTCATCGTGGGAA-6  Inhibitory         1    RNA
TTTGTCATCTTTACAC-6  Excitatory         1    RNA

[30370 rows x 3 columns]
                                        cell_type domain_id   source
Cell_ID                                                             
6749ccb4-2ed1-4029-968f-820a287f43c8   Astrocytes         0  MERFISH
6cac74bd-4ea7-4701-8701-42563cc65eb8   Inhibitory         0  MERFISH
9f29bd57-16a5-4b26-b9f5-37598809da9e   Inhibitory         0  MERFISH
d7eb4e0b-276e-47e3-a55c-0b033180a2fe   Inhibitory         0  MERFISH
54434f3a-eba9-4aec-af35-c9d317ffa1d5   Inhibitory         0  MERFISH
...                                           ...       ...      ...
b8be6c44-1b3a-4eb8-9cf9-827772348add    OD Mature         0  MERFISH
7d6f8abd-4529-44a9-bebb-40f42ab0ccb1    OD Mature         0  MERFISH
9e7e7c84-7dcc-4eef-a9ed-9fc80d55e938  OD Immature         0  MERFISH
6b666f81-7b73-4100-9e02-b5381b39f0f3    OD Mature         0  MERFISH
fdcddd97-7701-462a-b48f-979111245bd5    OD Mature         0  MERFISH

[64373 rows x 3 columns]
In [4]:
adata_rna = adata_rna[:, ~adata_rna.var_names.duplicated()]
adata_cm = adata_merfish.concatenate(adata_rna, join='inner', batch_key='domain_id')
print(adata_cm.obs)
                                         cell_type domain_id   source
Cell_ID                                                              
6749ccb4-2ed1-4029-968f-820a287f43c8-0  Astrocytes         0  MERFISH
6cac74bd-4ea7-4701-8701-42563cc65eb8-0  Inhibitory         0  MERFISH
9f29bd57-16a5-4b26-b9f5-37598809da9e-0  Inhibitory         0  MERFISH
d7eb4e0b-276e-47e3-a55c-0b033180a2fe-0  Inhibitory         0  MERFISH
54434f3a-eba9-4aec-af35-c9d317ffa1d5-0  Inhibitory         0  MERFISH
...                                            ...       ...      ...
TTTGGTTGTTATCACG-6-1                    Inhibitory         1      RNA
TTTGGTTGTTATTCTC-6-1                    Inhibitory         1      RNA
TTTGTCAGTTCCGTCT-6-1                    Inhibitory         1      RNA
TTTGTCATCGTGGGAA-6-1                    Inhibitory         1      RNA
TTTGTCATCTTTACAC-6-1                    Excitatory         1      RNA

[94743 rows x 3 columns]
In [5]:
import enclus.metrics
sc.pp.normalize_total(adata_cm)
sc.pp.log1p(adata_cm)
sc.pp.highly_variable_genes(adata_cm, n_top_genes=2000, batch_key='domain_id', inplace=False, subset=True)
enclus.metrics.batch_scale(adata_cm)
# adata_cm.write('./datasets/MERFISH_hypothalamic_cm_data.h5ad')
In [7]:
# adata_cm = sc.read('./datasets/MERFISH_hypothalamic_cm_data.h5ad')
sc.settings.figdir = './Result/hypothalamic/'
adata_cm_copy = adata_cm.copy()
sc.pp.pca(adata_cm_copy)
sc.pp.neighbors(adata_cm_copy)
sc.tl.umap(adata_cm_copy, min_dist=0.1)
sc.pl.umap(adata_cm_copy, color=['source', 'cell_type'],save='unintegrate.pdf',wspace=0.3,legend_fontsize=10)
WARNING: saving figure to file Result\hypothalamic\umapunintegrate.pdf
No description has been provided for this image

Save the preprocessed data for integration and online prediction

In [5]:
# import enclus.metrics
# st_data = sc.read('./datasets/hypothalamic/MERFISH_hypothalamic_st_data.h5ad')
# sc_data = sc.read('./datasets/hypothalamic/MERFISH_hypothalamic_sc_data.h5ad')

# sc_data.obs_names_make_unique()
# st_data.obs_names_make_unique()

# sc_data.var_names_make_unique()
# sc_data.var_names_make_unique()

# st_data.obs['domain_id'] = 0
# st_data.obs['domain_id'] = st_data.obs['domain_id'].astype('category')
# st_data.obs['source'] = 'MERFISH'

# sc_data.obs['domain_id'] = 1
# sc_data.obs['domain_id'] = sc_data.obs['domain_id'].astype('category')
# sc_data.obs['source'] = 'RNA'

# sc.pp.normalize_total(st_data)
# sc.pp.log1p(st_data)
# sc.pp.highly_variable_genes(st_data, n_top_genes=2000, inplace=False, subset=True)
# enclus.metrics.batch_scale(st_data)

# sc.pp.normalize_total(sc_data)
# sc.pp.log1p(sc_data)
# sc.pp.highly_variable_genes(sc_data, n_top_genes=2000, inplace=False, subset=True)
# enclus.metrics.batch_scale(sc_data)

# print(sc_data,st_data)
# st_data.write_h5ad('./datasets/ProcessedMERFISH_hypothalamic_st_data.h5ad')
# sc_data.write_h5ad('./datasets/ProcessedMERFISH_hypothalamic_sc_data.h5ad')
AnnData object with n_obs × n_vars = 30370 × 27998
    obs: 'cell_type', 'domain_id', 'source'
    var: 'Gene'
    uns: 'log1p' AnnData object with n_obs × n_vars = 64373 × 155
    obs: 'cell_type', 'domain_id', 'source'
    uns: 'log1p'
    obsm: 'spatial'